xxxxxxxxxx# iNeuron Internship Project : Swiggy Data Analysis<h2 align='center'>By : Pradeep Sehrawat </h2><img src="https://entrackr-bucket.s3.ap-south-1.amazonaws.com/wp-content/uploads/2022/02/10123018/Swiggy-img.jpg" width="500" height="400" align="center"/># library importimport pandas as pdimport numpy as npimport random as rnd# for data visualization import seaborn as sns import matplotlib.pyplot as plt%matplotlib inlineimport plotly.express as pximport warningswarnings.filterwarnings(action = 'ignore')xxxxxxxxxxdf=pd.read_csv("https://raw.githubusercontent.com/pradeep3114/DataSets/main/Swiggy%20Bangalore%20Outlet%20Details.csv")dfxxxxxxxxxxdf.describe()xxxxxxxxxxdf.info()xxxxxxxxxxdf.isnull().sum()xxxxxxxxxxdf['Shop_Name'].unique()xxxxxxxxxxdf.Shop_Name.value_counts()xxxxxxxxxxdf.Rating.unique()xxxxxxxxxxdf.dtypesxxxxxxxxxxdf.Rating=df.Rating.str.replace('--','0').astype(float)xxxxxxxxxxdf.Rating.unique()xxxxxxxxxxdf.Cost_for_Two.unique()xxxxxxxxxxa=df.Cost_for_Twoa.unique()xxxxxxxxxxobj=lambda x:x[2:] xxxxxxxxxxa=a.apply(obj)axxxxxxxxxxa=a.astype(int)axxxxxxxxxxdf.Cost_for_Two=adf.Cost_for_Twoxxxxxxxxxxdf.info()xxxxxxxxxxdf.describe()xxxxxxxxxx# Observation1. Mean Rating for Swiggy is 4.061 which is good for Customers.2. Average Cost for two person is approximately 321 rupees.3. Max Rating and Cost for two is 4.8 and 800 rupees respectively.Mean Rating for Swiggy is 4.061 which is good for Customers.
Average Cost for two person is approximately 321 rupees.
Max Rating and Cost for two is 4.8 and 800 rupees respectively.
xxxxxxxxxx#Shops that have minimum cost and maximum ratingxxxxxxxxxxdf.sort_values(by='Rating', ascending=False).head()xxxxxxxxxxdf.sort_values(by='Cost_for_Two').head()xxxxxxxxxx# Observation1. Most liked food is from Khichdi Experiment which have Home Food, Healthy Food, Indian cuisines and have highest rating of 4.82. Ice cream business is more liked in Koramangala3. Minimum cost for two is From shop "Maa di Hatti"Most liked food is from Khichdi Experiment which have Home Food, Healthy Food, Indian cuisines and have highest rating of 4.8
Ice cream business is more liked in Koramangala
Minimum cost for two is From shop "Maa di Hatti"
xxxxxxxxxxxxxxxxxxxxplt.figure(figsize=(15,15))plt.xticks(rotation=90)sns.barplot(x=df.Shop_Name,y=df.Cost_for_Two)xxxxxxxxxxplt.figure(figsize=(15,15))plt.xticks(rotation=90)sns.barplot(x=df.Shop_Name,y=df.Rating)xxxxxxxxxxdf.Rating.describe()xxxxxxxxxxdf["Rating"].sort_values(ascending=True)xxxxxxxxxxL=df.Location.unique()Lxxxxxxxxxxloc=[]def dis(): for i in df.Location: loc.append(i.split(",")[-1]) return locl=dis()lxxxxxxxxxxa=set(l)len(a)xxxxxxxxxxaxxxxxxxxxxdf["Area"]=ldfxxxxxxxxxx**Area Koramangala**Area Koramangala
xxxxxxxxxxKoramangala = df[df['Area'] == ' Koramangala']KoramangalaxxxxxxxxxxKoramangala.describe()xxxxxxxxxxplt.figure(figsize=(10,10))plt.xticks(rotation=90)sns.barplot(x=Koramangala['Shop_Name'],y=Koramangala.Rating)xxxxxxxxxxsns.histplot(Koramangala['Cost_for_Two'],bins=15)xxxxxxxxxxsns.histplot(Koramangala['Rating'],bins=10)xxxxxxxxxxxxxxxxxxxx**Area Jayanagar**Area Jayanagar
xxxxxxxxxxJayanagar = df[df['Area'] == ' Jayanagar']Jayanagarxxxxxxxxxxxxxxxxxxxx**Area HSR**Area HSR
xxxxxxxxxxHSR = df[df['Area'] == ' HSR']HSRxxxxxxxxxxHSR.describe()xxxxxxxxxxplt.figure(figsize=(10,10))plt.xticks(rotation=90)sns.barplot(x=HSR['Shop_Name'],y=HSR.Rating)xxxxxxxxxxsns.histplot(HSR['Cost_for_Two'],bins=10)xxxxxxxxxxsns.histplot(HSR['Rating'],bins=10)xxxxxxxxxxxxxxxxxxxx**Area BTM**Area BTM
xxxxxxxxxxBTM = df[df['Area'] == ' BTM']BTMxxxxxxxxxxHSR.describe()xxxxxxxxxxplt.figure(figsize=(10,10))plt.xticks(rotation=90)sns.barplot(x=BTM['Shop_Name'],y=BTM.Rating)xxxxxxxxxxsns.histplot(BTM['Cost_for_Two'],bins=10)xxxxxxxxxxsns.histplot(BTM['Rating'],bins=10)xxxxxxxxxx# Conclusion: As we can see that the Area-wise Rating & Cost for Two varies as following:BTM : Most has 4.0 to 4.2 Rating and Approx. Cost for Two People lies between 200 to 350. (Max. Cost goes upto 600)HSR : Most has 4 or above Rating and Approx. Cost for Two People lies between 300 to 400. (Max. Cost goes upto 800)Koramangala : Most has 4.0 to 4.3 Rating and Approx. Cost for Two People lies between 200 to 350. (Max. Cost goes upto 600)With this we can conclude the Most Costly Area is HSRAs we can see that the Area-wise Rating & Cost for Two varies as following:
BTM : Most has 4.0 to 4.2 Rating and Approx. Cost for Two People lies between 200 to 350. (Max. Cost goes upto 600)
HSR : Most has 4 or above Rating and Approx. Cost for Two People lies between 300 to 400. (Max. Cost goes upto 800)
Koramangala : Most has 4.0 to 4.3 Rating and Approx. Cost for Two People lies between 200 to 350. (Max. Cost goes upto 600)
With this we can conclude the Most Costly Area is HSR
xxxxxxxxxxRevenue={}Revenue['BTM']=BTM['Cost_for_Two'].sum()Revenue['HSR']=HSR['Cost_for_Two'].sum()Revenue['Jayanagar']=Jayanagar['Cost_for_Two'].sum()Revenue['Koramangala']=Koramangala['Cost_for_Two'].sum()Re=Revenue.values()city=Revenue.keys() Revenue=pd.DataFrame()Revenue['Revenue'] = ReRevenue['City'] = cityRevenuexxxxxxxxxxsns.barplot(x=Revenue['City'], y=Revenue['Revenue'],data=Revenue)plt.xlabel('Revenue ------>',fontsize=15)plt.ylabel('City ------>',fontsize=15)plt.show()xxxxxxxxxxsns.countplot(df.Area)plt.title("Number of shop In Area")xxxxxxxxxx# Conclusion: Maximum Revenue is from Koramangala and Maximum number of shops are from KoramangalaMaximum Revenue is from Koramangala and Maximum number of shops are from Koramangala
xxxxxxxxxxdf.Cuisinexxxxxxxxxxw=df.Cuisine.unique()wxxxxxxxxxxcu=[]for i in w: e=i.split(",") for j in e: if j[0]==" ": j=j[1:] if j not in cu: cu.append(j)cuxxxxxxxxxxdict_Cuisine = {}for i in df['Cuisine'].unique(): Cuisines_Lists = i.split(',') for Cuisine in Cuisines_Lists: Cuisine =Cuisine.lstrip(" ") if Cuisine in dict_Cuisine: dict_Cuisine[Cuisine] = dict_Cuisine[Cuisine] + 1 else: dict_Cuisine[Cuisine] = 1 print(dict_Cuisine)print()print('Total Records: \t', len( dict_Cuisine))xxxxxxxxxxCuisine = dict_Cuisine.keys()freq = dict_Cuisine.values()xxxxxxxxxxdf_Cuisine_Analysis = pd.DataFrame()xxxxxxxxxxdf_Cuisine_Analysis['Cuisine'] = Cuisinedf_Cuisine_Analysis['Count'] = freqxxxxxxxxxxdf_Cuisine_Analysisxxxxxxxxxxplt.figure(figsize = (20, 8))sns.barplot(x = df_Cuisine_Analysis['Cuisine'], y = df_Cuisine_Analysis['Count'], data = df_Cuisine_Analysis)plt.xticks(rotation = 90)plt.title('Cuisines Overall Analysis (Bangalore)', fontsize = 14, fontweight = 'bold', fontstyle = 'italic')plt.xlabel('Cuisine', fontsize = 11, fontweight = 'bold')plt.ylabel('Number of Restaurants', fontsize = 11, fontweight = 'bold')plt.show()xxxxxxxxxxdf_Cuisine_sort=df_Cuisine_Analysis.sort_values(by='Count',ascending=False).head(10)df_Cuisine_sortxxxxxxxxxxplt.pie(df_Cuisine_sort['Count'],labels=df_Cuisine_sort['Cuisine'],autopct='%.2f%%')plt.show()xxxxxxxxxx# Conclusion:1. From the above Visualizations, We can say, Most of the Resturants sell "Chinese" which is around '19.89' followed by "North Indian" & "South Indian" Cuisines which are around '18.18' & '13.07'.3. We can also infer that Most of the people are fond of these Cusines.xxxxxxxxxx**BTM Area**BTM Area
xxxxxxxxxxBTM_cuisine=BTM["Cuisine"]BTM_cuisinexxxxxxxxxxBTM_dict={}for i in BTM_cuisine: k=i.split(",") for j in k: j = j.lstrip(' ') if j in BTM_dict: BTM_dict[j]=BTM_dict[j]+1 else: BTM_dict[j]=1 print(BTM_dict)print()print('Total Records of BTM: \t', len(BTM_dict)) xxxxxxxxxxCuisine_BTM = BTM_dict.keys()freq_BTM = BTM_dict.values()dict_BTM = { 'Cuisine' : Cuisine_BTM, 'Count' : freq_BTM}BTM_Cuisine= pd.DataFrame(dict_BTM)BTM_Cuisinexxxxxxxxxxplt.figure(figsize = (20, 8))sns.barplot(x = BTM_Cuisine['Cuisine'], y = BTM_Cuisine['Count'], data = BTM_Cuisine)plt.xticks(rotation = 90)plt.title('Cuisines Analysis - BTM', fontsize = 14, fontweight = 'bold', fontstyle = 'italic')plt.xlabel('Cuisine', fontsize = 11, fontweight = 'bold')plt.ylabel('Number of Restaurants', fontsize = 11, fontweight = 'bold')plt.show()xxxxxxxxxxcu_BTM=BTM_Cuisine.sort_values(by="Count",ascending=False)cu_BTMxxxxxxxxxxfig = px.pie(data_frame = cu_BTM, names = cu_BTM['Cuisine'], values = cu_BTM['Count'], title = 'Distribution of Cuisines in Area BTM Restaurants')fig.update_traces(textposition = 'inside', textinfo = 'percent+label')fig.show()xxxxxxxxxx# Conclusion:From the above Visualizations, We can say, In BTM Area, Most of the Resturants sell "Chinese" which is around '16.4%' followed by "North Indian" & "South Indian" Cuisines which are around '16.4%' & '9.48%'.So, We can also infer that Most of the people are fond of these Cusines. From the above Visualizations, We can say, In BTM Area, Most of the Resturants sell "Chinese" which is around '16.4%' followed by "North Indian" & "South Indian" Cuisines which are around '16.4%' & '9.48%'.
So, We can also infer that Most of the people are fond of these Cusines.
xxxxxxxxxxxxxxxxxxxx**Area HSR**Area HSR
xxxxxxxxxxHSR_cuisine=HSR["Cuisine"]HSR_cuisinexxxxxxxxxxHSR_dict={}for i in HSR_cuisine: k=i.split(",") for j in k: j = j.lstrip(' ') if j in HSR_dict: HSR_dict[j]=HSR_dict[j]+1 else: HSR_dict[j]=1 print(HSR_dict)print()print('Total Records of HSR: \t', len(HSR_dict)) xxxxxxxxxxCuisine_HSR = HSR_dict.keys()freq_HSR= HSR_dict.values()dict_HSR = { 'Cuisine' : Cuisine_HSR, 'Count' : freq_HSR}HSR_Cuisine= pd.DataFrame(dict_HSR)HSR_Cuisinexxxxxxxxxxplt.figure(figsize = (20, 8))sns.barplot(x = HSR_Cuisine['Cuisine'], y = HSR_Cuisine['Count'], data = HSR_Cuisine)plt.xticks(rotation = 90)plt.title('Cuisines Analysis - HSR', fontsize = 14, fontweight = 'bold', fontstyle = 'italic')plt.xlabel('Cuisine', fontsize = 11, fontweight = 'bold')plt.ylabel('Number of Restaurants', fontsize = 11, fontweight = 'bold')plt.show()xxxxxxxxxxcu_HSR=HSR_Cuisine.sort_values(by="Count",ascending=False)cu_HSRxxxxxxxxxxfig = px.pie(data_frame = cu_HSR, names = cu_HSR['Cuisine'], values = cu_HSR['Count'], title = 'Distribution of Cuisines in Area HSR Restaurants')fig.update_traces(textposition = 'inside', textinfo = 'percent+label')fig.show()xxxxxxxxxx# Conclusion:We can say, In HSR Area, "North Indian" Cuisines are dominated by around '17.4%' followed by "Chinese" & "South Indian" Cuisines '10.9%' & '8.7%' Restaurants respectively.In HSR Area, We may have more "North Indian" people staying there.We can say, In HSR Area, "North Indian" Cuisines are dominated by around '17.4%' followed by "Chinese" & "South Indian" Cuisines '10.9%' & '8.7%' Restaurants respectively.
In HSR Area, We may have more "North Indian" people staying there.
xxxxxxxxxxxxxxxxxxxx**Koramangala Area**Koramangala Area
xxxxxxxxxxKoramangala_cuisine=Koramangala["Cuisine"]Koramangala_cuisinexxxxxxxxxxKoramangala_dict={}for i in Koramangala_cuisine: k=i.split(",") for j in k: j = j.lstrip(' ') if j in Koramangala_dict: Koramangala_dict[j]=Koramangala_dict[j]+1 else: Koramangala_dict[j]=1 print(Koramangala_dict)print()print('Total Records of HSR: \t', len(Koramangala_dict)) xxxxxxxxxxCuisine_Koramangala = Koramangala_dict.keys()freq_Koramangala= Koramangala_dict.values()dict_Koramangala = { 'Cuisine' : Cuisine_Koramangala, 'Count' : freq_Koramangala}Koramangala_Cuisine= pd.DataFrame(dict_Koramangala)Koramangala_Cuisinexxxxxxxxxxplt.figure(figsize = (20, 8))sns.barplot(x = Koramangala_Cuisine['Cuisine'], y = Koramangala_Cuisine['Count'], data =Koramangala_Cuisine)plt.xticks(rotation = 90)plt.title('Cuisines Analysis - HSR', fontsize = 14, fontweight = 'bold', fontstyle = 'italic')plt.xlabel('Cuisine', fontsize = 11, fontweight = 'bold')plt.ylabel('Number of Restaurants', fontsize = 11, fontweight = 'bold')plt.show()xxxxxxxxxxcu_Koramangala=Koramangala_Cuisine.sort_values(by="Count",ascending=False)cu_Koramangalaxxxxxxxxxxfig = px.pie(data_frame = cu_HSR, names = cu_Koramangala['Cuisine'], values = cu_Koramangala['Count'], title = 'Distribution of Cuisines in Area Koramangala Restaurants')fig.update_traces(textposition = 'inside', textinfo = 'percent+label')fig.show()xxxxxxxxxx# Conclusion:We can say, In Koramangala Area, "North Indian" Cuisines are dominated by around '12%' followed by "Chinese" Cuisine is 10.8% & "South Indian" Cuisine is 7.78%So, We can also infer that Most of the people are fond of the "North Indian" CuisineWe can say, In Koramangala Area, "North Indian" Cuisines are dominated by around '12%' followed by "Chinese" Cuisine is 10.8% & "South Indian" Cuisine is 7.78%
So, We can also infer that Most of the people are fond of the "North Indian" Cuisine
xxxxxxxxxxxxxxxxxxxxdfxxxxxxxxxxdf['Cuisine']xxxxxxxxxxdf_Swiggy_Text = ', '.join(df['Cuisine'])df_Swiggy_Textxxxxxxxxxxpip install wordcloudxxxxxxxxxxfrom wordcloud import WordCloudxxxxxxxxxximport nltkfrom nltk.corpus import stopwordsxxxxxxxxxximport warningswarnings.filterwarnings(action = 'ignore')xxxxxxxxxxstopwords_ENG = set(stopwords.words('english'))stopwords_ENGxxxxxxxxxxwordcloud = WordCloud(width = 1000, height = 500, stopwords = stopwords_ENG, background_color = 'white').generate(df_Swiggy_Text)plt.figure(figsize = (22, 7))plt.imshow(wordcloud)plt.axis('off')plt.show()xxxxxxxxxx# Save the Image in the 'img' Folder:wordcloud.to_file("C:/Users/prade/Desktop/intern/Ineuron.ai internship/swiggy/Cuisines.png")xxxxxxxxxx